Data Preparation

library(vars)
library(lmtest) # линейные регрессии 
library("ggplot2") # графики
library("dplyr") # манипуляции с таблицами
library(glmnet) # линейные регрессии 
library(tidyverse)# обработка данных
library(car)# для vif

totaldannye <-read.csv("totaldannye.csv", header = TRUE) 

view(totaldannye)
summary(totaldannye)
##     region               time       plan_value_hps plan_value_tps   
##  Length:118848      Min.   : 0.00   Min.   :   0   Min.   :  307.7  
##  Class :character   1st Qu.: 5.75   1st Qu.:   0   1st Qu.: 2388.8  
##  Mode  :character   Median :11.50   Median :  21   Median : 3041.8  
##                     Mean   :11.50   Mean   :1660   Mean   : 4046.8  
##                     3rd Qu.:17.25   3rd Qu.:1682   3rd Qu.: 4720.3  
##                     Max.   :23.00   Max.   :8056   Max.   :12596.5  
##   tech_min_hps      tech_min_tps    technology_min_hps technology_min_tps
##  Min.   :-1320.0   Min.   : 214.5   Min.   :   0.0     Min.   : 272.5    
##  1st Qu.: -104.8   1st Qu.:1360.0   1st Qu.:   0.0     1st Qu.:2016.1    
##  Median :    0.0   Median :1953.3   Median :   0.0     Median :2592.9    
##  Mean   : -276.2   Mean   :2675.2   Mean   : 495.0     Mean   :3276.5    
##  3rd Qu.:    1.0   3rd Qu.:3246.3   3rd Qu.: 576.8     3rd Qu.:4255.9    
##  Max.   :  220.4   Max.   :7814.6   Max.   :2835.2     Max.   :9861.6    
##   tech_max_hps     tech_max_tps       plan_value       plan_exp      
##  Min.   :   0.0   Min.   :  442.5   Min.   : 2129   Min.   :  33.77  
##  1st Qu.:  52.5   1st Qu.: 2762.7   1st Qu.: 3238   1st Qu.: 401.58  
##  Median : 650.0   Median : 3404.2   Median : 4667   Median : 647.62  
##  Mean   :2332.2   Mean   : 4717.2   Mean   : 6622   Mean   : 789.77  
##  3rd Qu.:2500.4   3rd Qu.: 5800.7   3rd Qu.: 9051   3rd Qu.:1071.03  
##  Max.   :9045.0   Max.   :13579.7   Max.   :19062   Max.   :2536.07  
##     plan_imp        price_demand   price_supply  ov_plan_value  
##  Min.   :  17.36   Min.   :   0   Min.   :   0   Min.   : 2294  
##  1st Qu.: 968.31   1st Qu.:1046   1st Qu.:1012   1st Qu.: 3928  
##  Median :1533.54   Median :1378   Median :1340   Median : 5192  
##  Mean   :2332.96   Mean   :1358   Mean   :1321   Mean   : 7096  
##  3rd Qu.:3220.07   3rd Qu.:1652   3rd Qu.:1625   3rd Qu.: 9270  
##  Max.   :8165.45   Max.   :3169   Max.   :2923   Max.   :19546  
##      date                infl       price_demand_real price_supply_real
##  Length:118848      Min.   :1.000   Min.   :   0.0    Min.   :   0     
##  Class :character   1st Qu.:1.070   1st Qu.: 897.3    1st Qu.: 865     
##  Mode  :character   Median :1.221   Median :1154.8    Median :1128     
##                     Mean   :1.186   Mean   :1146.0    Mean   :1115     
##                     3rd Qu.:1.271   3rd Qu.:1396.6    3rd Qu.:1375     
##                     Max.   :1.410   Max.   :2685.9    Max.   :2568     
##   real_usd_rub     oil_rub_real   coal_rub_real     al_rub_real  
##  Min.   :0.5879   Min.   : 2710   Min.   : 10836   Min.   :1601  
##  1st Qu.:0.8600   1st Qu.: 3765   1st Qu.: 17273   1st Qu.:1787  
##  Median :0.9039   Median : 4517   Median : 20727   Median :1987  
##  Mean   :0.8998   Mean   : 4566   Mean   : 53996   Mean   :2106  
##  3rd Qu.:0.9708   3rd Qu.: 5101   3rd Qu.: 37538   3rd Qu.:2377  
##  Max.   :1.4238   Max.   :10268   Max.   :257904   Max.   :3404  
##  steel_rub_real     weather_t         weather_u        weather_t3     
##  Min.   : 32419   Min.   :-25.750   Min.   :  6.69   Min.   :-25.770  
##  1st Qu.: 54296   1st Qu.: -4.200   1st Qu.: 34.02   1st Qu.: -4.020  
##  Median : 69137   Median :  1.195   Median : 40.39   Median :  1.401  
##  Mean   : 75136   Mean   :  1.700   Mean   : 46.14   Mean   :  2.022  
##  3rd Qu.: 97563   3rd Qu.:  7.450   3rd Qu.: 48.11   3rd Qu.:  7.805  
##  Max.   :135128   Max.   : 31.740   Max.   :100.00   Max.   : 33.070  
##    weather_u3        weather_t6        weather_u6      weather_t12     
##  Min.   :  5.322   Min.   :-25.290   Min.   :  5.53   Min.   :-24.790  
##  1st Qu.: 32.528   1st Qu.: -4.028   1st Qu.: 32.66   1st Qu.: -4.004  
##  Median : 40.110   Median :  1.420   Median : 39.97   Median :  1.484  
##  Mean   : 45.043   Mean   :  2.023   Mean   : 45.04   Mean   :  2.023  
##  3rd Qu.: 47.680   3rd Qu.:  7.820   3rd Qu.: 47.46   3rd Qu.:  7.898  
##  Max.   :100.000   Max.   : 32.650   Max.   :100.00   Max.   : 31.740  
##   weather_u12      weather_t24       weather_u24      day_of_the_week   
##  Min.   :  6.69   Min.   :-23.690   Min.   :  8.109   Length:118848     
##  1st Qu.: 32.83   1st Qu.: -4.013   1st Qu.: 33.120   Class :character  
##  Median : 39.60   Median :  1.533   Median : 39.150   Mode  :character  
##  Mean   : 45.04   Mean   :  2.021   Mean   : 45.057                     
##  3rd Qu.: 47.06   3rd Qu.:  8.007   3rd Qu.: 46.776                     
##  Max.   :100.00   Max.   : 28.200   Max.   :100.000                     
##     holiday     month                year     
##  Min.   :0   Length:118848      Min.   :2020  
##  1st Qu.:0   Class :character   1st Qu.:2021  
##  Median :0   Mode  :character   Median :2022  
##  Mean   :0                      Mean   :2022  
##  3rd Qu.:0                      3rd Qu.:2023  
##  Max.   :0                      Max.   :2024
totaldannye$time <- as.character(totaldannye$time)
totaldannye$day_of_the_week = substr(totaldannye$day_of_the_week, 1, 1)
totaldannye$day_of_the_week <- as.character(totaldannye$day_of_the_week)

totaldannye <- subset(totaldannye,select=-price_demand)
totaldannye <- subset(totaldannye,select=-price_supply)
totaldannye <- subset(totaldannye,select=-plan_value) 
totaldannye <- subset(totaldannye,select=-month)
totaldannye <- subset(totaldannye,select=-year)
#totaldannye <- subset(totaldannye,select=-infl)
totaldannye <- subset(totaldannye,select=-technology_min_hps)
totaldannye <- subset(totaldannye,select=-technology_min_tps)
totaldannye <- subset(totaldannye,select=-weather_t3)
totaldannye <- subset(totaldannye,select=-weather_t6)
totaldannye <- subset(totaldannye,select=-weather_t12)
totaldannye <- subset(totaldannye,select=-weather_t24)
totaldannye <- subset(totaldannye,select=-weather_u3)
totaldannye <- subset(totaldannye,select=-weather_u6)
totaldannye <- subset(totaldannye,select=-weather_u12)
totaldannye <- subset(totaldannye,select=-weather_u24)

# Modified function to replace outliers with neighbor averages
smooth_outliers <- function(region_data, region_name) {
  # Select only numeric columns
  numeric_cols <- sapply(region_data, is.numeric)
  numeric_data <- region_data[, numeric_cols]
  
  # Create directory for plots if it doesn't exist
  if (!dir.exists("boxplots")) {
    dir.create("boxplots")
  }
  
  # Create boxplots for each numeric variable and save to file
  pdf(file = paste0("boxplots/", region_name, "_boxplots.pdf"), width = 11, height = 8)
  par(mfrow = c(3, 3))
  for (col in names(numeric_data)) {
    boxplot(numeric_data[[col]], main = paste(region_name, col))
  }
  dev.off()
  
  # Replace outliers with neighbor averages (3-point moving average)
  smoothed_data <- region_data
  for (col in names(numeric_data)) {
    if (length(unique(numeric_data[[col]])) > 1) { # Skip if constant
      # Identify outliers
      lower_bound <- quantile(numeric_data[[col]], 0.005, na.rm = TRUE)
      upper_bound <- quantile(numeric_data[[col]], 0.995, na.rm = TRUE)
      outliers <- which(numeric_data[[col]] < lower_bound | numeric_data[[col]] > upper_bound)
      
      # Replace outliers with moving average
      for (i in outliers) {
        start_idx <- max(1, i-1)
        end_idx <- min(nrow(numeric_data), i+1)
        neighbor_values <- numeric_data[[col]][start_idx:end_idx]
        valid_neighbors <- neighbor_values[!neighbor_values %in% numeric_data[[col]][outliers]]
        
        if (length(valid_neighbors) > 0) {
          smoothed_data[[col]][i] <- mean(valid_neighbors, na.rm = TRUE)
        } else {
          smoothed_data[[col]][i] <- NA # If no valid neighbors, set to NA
        }
      }
    }
  }
  
  # Remove any remaining NAs
  smoothed_data <- na.omit(smoothed_data)
  cat(paste("Smoothed outliers in", region_name, ":", length(outliers), "points\n"))
  
  return(smoothed_data)
}

# Apply to each region (replacing the previous clean_region_data calls)
Chelyabinsk_Oblast <- smooth_outliers(
  subset(totaldannye, region == "Челябинская область"), 
  "Chelyabinsk"
)
## Smoothed outliers in Chelyabinsk : 289 points
Irkutsk_Oblast <- smooth_outliers(
  subset(totaldannye, region == "Иркутская область"), 
  "Irkutsk"
)
## Smoothed outliers in Irkutsk : 272 points
Republic_of_Tatarstan <- smooth_outliers(
  subset(totaldannye, region == "Республика Татарстан"), 
  "Tatarstan"
)
## Smoothed outliers in Tatarstan : 298 points
Moscow_Oblast <- smooth_outliers(
  subset(totaldannye, region == "Московская область"), 
  "Moscow"
)
## Smoothed outliers in Moscow : 290 points

Chelyabinsk Oblast Analysis

core_vars <- Chelyabinsk_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]

# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const")  # Test lags 1 to 35

plot(var_model)

# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection)  # Shows optimal lags by AIC, BIC, etc.
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     29     28     27     29
M1_D <- lm(data = Chelyabinsk_Oblast, price_demand_real ~ time + plan_value_hps + plan_value_tps + tech_min_hps + tech_min_tps + tech_max_hps + tech_max_tps + plan_exp + plan_imp + ov_plan_value + date + infl + price_supply_real + real_usd_rub + oil_rub_real + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u +day_of_the_week + holiday)  

for (i in 1:35) {
  Chelyabinsk_Oblast[[paste0("price_demand_real_lag", i)]] <- lag(Chelyabinsk_Oblast$price_demand_real, i)
}

# Remove rows with NA values (created by lagging)
Chelyabinsk_Oblast_clean <- na.omit(Chelyabinsk_Oblast)

# Instead of using all planning variables, create:
Chelyabinsk_Oblast_clean$net_plan_effect <- with(Chelyabinsk_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)

# Replace individual tech variables with:
Chelyabinsk_Oblast_clean$tech_avg_tps <- with(Chelyabinsk_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Chelyabinsk_Oblast_clean$tech_range_tps <- with(Chelyabinsk_Oblast_clean, tech_max_tps - tech_min_tps)

# Replace individual tech variables with:
Chelyabinsk_Oblast_clean$tech_avg_hps <- with(Chelyabinsk_Oblast_clean, tech_min_hps/2 + tech_max_hps/2)
Chelyabinsk_Oblast_clean$tech_range_hps <- with(Chelyabinsk_Oblast_clean, tech_max_hps - tech_min_hps)

lag_terms_Chelyabinsk_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Chelyabinsk_Oblast <- "price_demand_real ~ time + net_plan_effect +tech_avg_tps + infl + real_usd_rub + oil_rub_real + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"

# Fit the model
M1_D_lagged <- lm(formula = base_formula_Chelyabinsk_Oblast, data = Chelyabinsk_Oblast_clean)
summary(M1_D_lagged)
## 
## Call:
## lm(formula = base_formula_Chelyabinsk_Oblast, data = Chelyabinsk_Oblast_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -474.64  -18.49    0.27   18.39  476.07 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.621e+01  8.552e+00   3.065 0.002179 ** 
## time1                   -4.873e+00  1.708e+00  -2.853 0.004336 ** 
## time10                   7.040e+01  1.906e+00  36.938  < 2e-16 ***
## time11                   7.258e+01  1.906e+00  38.088  < 2e-16 ***
## time12                   6.493e+01  1.905e+00  34.083  < 2e-16 ***
## time13                   7.339e+01  1.901e+00  38.602  < 2e-16 ***
## time14                   7.613e+01  1.905e+00  39.975  < 2e-16 ***
## time15                   7.401e+01  1.912e+00  38.710  < 2e-16 ***
## time16                   7.638e+01  1.915e+00  39.885  < 2e-16 ***
## time17                   6.378e+01  1.917e+00  33.269  < 2e-16 ***
## time18                   5.706e+01  1.909e+00  29.887  < 2e-16 ***
## time19                   5.430e+01  1.898e+00  28.613  < 2e-16 ***
## time2                    1.419e+01  1.729e+00   8.203 2.45e-16 ***
## time20                   2.474e+01  1.887e+00  13.112  < 2e-16 ***
## time21                   7.607e+00  1.846e+00   4.122 3.77e-05 ***
## time22                  -4.165e+01  1.791e+00 -23.262  < 2e-16 ***
## time23                  -5.404e+01  1.711e+00 -31.575  < 2e-16 ***
## time3                    3.353e+01  1.737e+00  19.301  < 2e-16 ***
## time4                    6.612e+01  1.734e+00  38.119  < 2e-16 ***
## time5                    1.066e+02  1.721e+00  61.911  < 2e-16 ***
## time6                    1.697e+02  1.726e+00  98.284  < 2e-16 ***
## time7                    1.569e+02  1.756e+00  89.313  < 2e-16 ***
## time8                    1.394e+02  1.820e+00  76.620  < 2e-16 ***
## time9                    9.868e+01  1.883e+00  52.414  < 2e-16 ***
## net_plan_effect          3.464e-02  8.969e-03   3.862 0.000113 ***
## tech_avg_tps            -5.195e-03  1.088e-03  -4.774 1.81e-06 ***
## infl                     1.289e+01  4.199e+00   3.069 0.002149 ** 
## real_usd_rub            -5.970e-01  5.830e+00  -0.102 0.918437    
## oil_rub_real            -1.722e-04  5.767e-04  -0.299 0.765226    
## coal_rub_real           -2.702e-05  8.047e-06  -3.358 0.000785 ***
## al_rub_real              2.455e-03  1.520e-03   1.615 0.106412    
## steel_rub_real           3.465e-05  1.730e-05   2.003 0.045168 *  
## weather_t               -8.009e-02  6.335e-02  -1.264 0.206131    
## weather_u                1.604e-01  3.609e-02   4.445 8.82e-06 ***
## day_of_the_week2        -2.241e+00  9.249e-01  -2.422 0.015423 *  
## day_of_the_week3        -1.450e+00  9.261e-01  -1.566 0.117378    
## day_of_the_week4         4.996e-02  9.260e-01   0.054 0.956974    
## day_of_the_week5         3.391e+00  9.280e-01   3.654 0.000259 ***
## day_of_the_week6         1.571e+00  9.315e-01   1.686 0.091733 .  
## day_of_the_week7        -5.999e+00  9.471e-01  -6.334 2.43e-10 ***
## price_demand_real_lag1   8.758e-01  2.855e-03 306.750  < 2e-16 ***
## price_demand_real_lag24  4.715e-02  2.386e-03  19.761  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40.56 on 27060 degrees of freedom
## Multiple R-squared:  0.9615, Adjusted R-squared:  0.9614 
## F-statistic: 1.648e+04 on 41 and 27060 DF,  p-value: < 2.2e-16
vif_results <- vif(M1_D_lagged)
print(vif_results)
##                             GVIF Df GVIF^(1/(2*Df))
## time                    5.674458 23        1.038460
## net_plan_effect         1.396172  1        1.181597
## tech_avg_tps            2.793571  1        1.671398
## infl                    2.389713  1        1.545870
## real_usd_rub            7.984252  1        2.825642
## oil_rub_real            4.930564  1        2.220487
## coal_rub_real           4.749254  1        2.179278
## al_rub_real             5.294014  1        2.300872
## steel_rub_real          3.976642  1        1.994152
## weather_t               2.959787  1        1.720403
## weather_u               1.500402  1        1.224909
## day_of_the_week         1.095188  6        1.007606
## price_demand_real_lag1  5.728980  1        2.393529
## price_demand_real_lag24 4.001760  1        2.000440

Irkutsk Oblast Analysis

core_vars <- Irkutsk_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]

# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const")  # Test lags 1 to 35

plot(var_model)

# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection)  # Shows optimal lags by AIC, BIC, etc.
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     29     27      2     29
for (i in 1:35) {Irkutsk_Oblast [[paste0("price_demand_real_lag", i)]] <- lag(Irkutsk_Oblast$price_demand_real, i)}

# Remove rows with NA values (created by lagging) 
Irkutsk_Oblast_clean <- na.omit(Irkutsk_Oblast)

# Instead of using all planning variables, create:
Irkutsk_Oblast_clean$net_plan_effect <- with(Irkutsk_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)

# Replace individual tech variables with:
Irkutsk_Oblast_clean$tech_avg_tps <- with(Irkutsk_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Irkutsk_Oblast_clean$tech_range_tps <- with(Irkutsk_Oblast_clean, tech_max_tps - tech_min_tps)

# Replace individual tech variables with:
Irkutsk_Oblast_clean$tech_avg_hps <- with(Irkutsk_Oblast_clean, tech_min_hps/2 + tech_max_hps/2)
Irkutsk_Oblast_clean$tech_range_hps <- with(Irkutsk_Oblast_clean, tech_max_hps - tech_min_hps)

Irkutsk_Oblast_clean$tech_avg <- with(Irkutsk_Oblast_clean, tech_avg_hps + tech_avg_tps)

lag_terms_Irkutsk_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Irkutsk_Oblast <- "price_demand_real ~ time + net_plan_effect + infl + tech_avg + real_usd_rub + coal_rub_real + al_rub_real + steel_rub_real + weather_t + weather_u + day_of_the_week+ price_demand_real_lag1 + price_demand_real_lag24 "

# Fit the model
M2_D_lagged <- lm(formula = base_formula_Irkutsk_Oblast, data = Irkutsk_Oblast_clean)
summary(M2_D_lagged)
## 
## Call:
## lm(formula = base_formula_Irkutsk_Oblast, data = Irkutsk_Oblast_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -717.88  -12.03   -0.03   11.96  683.36 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.315e+01  7.469e+00   7.115 1.14e-12 ***
## time1                    8.506e+00  1.871e+00   4.547 5.47e-06 ***
## time10                   5.506e+00  1.920e+00   2.868  0.00414 ** 
## time11                   1.303e+01  1.916e+00   6.799 1.07e-11 ***
## time12                   1.870e+01  1.923e+00   9.727  < 2e-16 ***
## time13                   2.668e+01  1.931e+00  13.821  < 2e-16 ***
## time14                   1.996e+01  1.941e+00  10.285  < 2e-16 ***
## time15                   8.057e+00  1.942e+00   4.148 3.36e-05 ***
## time16                   8.383e+00  1.937e+00   4.327 1.52e-05 ***
## time17                   4.534e+00  1.932e+00   2.346  0.01897 *  
## time18                  -1.712e+01  1.919e+00  -8.921  < 2e-16 ***
## time19                  -1.470e+01  1.901e+00  -7.735 1.07e-14 ***
## time2                    1.425e+01  1.866e+00   7.639 2.27e-14 ***
## time20                  -5.802e+00  1.900e+00  -3.053  0.00226 ** 
## time21                  -2.238e+00  1.906e+00  -1.174  0.24028    
## time22                  -3.359e+00  1.901e+00  -1.767  0.07730 .  
## time23                  -2.759e+00  1.872e+00  -1.474  0.14051    
## time3                    1.713e+01  1.876e+00   9.133  < 2e-16 ***
## time4                    2.182e+01  1.890e+00  11.548  < 2e-16 ***
## time5                    2.088e+01  1.908e+00  10.943  < 2e-16 ***
## time6                    1.753e+01  1.921e+00   9.125  < 2e-16 ***
## time7                    7.947e+00  1.928e+00   4.123 3.76e-05 ***
## time8                    1.185e+01  1.930e+00   6.138 8.48e-10 ***
## time9                    5.246e+00  1.929e+00   2.720  0.00653 ** 
## net_plan_effect         -8.380e-03  4.976e-04 -16.839  < 2e-16 ***
## infl                     4.874e+01  4.010e+00  12.156  < 2e-16 ***
## tech_avg                 3.986e-04  1.033e-03   0.386  0.69951    
## real_usd_rub            -2.011e+01  4.198e+00  -4.791 1.67e-06 ***
## coal_rub_real           -2.176e-05  8.058e-06  -2.700  0.00693 ** 
## al_rub_real             -9.421e-04  1.210e-03  -0.779  0.43615    
## steel_rub_real           9.308e-05  1.941e-05   4.795 1.63e-06 ***
## weather_t               -6.858e-01  8.865e-02  -7.737 1.06e-14 ***
## weather_u               -1.116e-01  4.685e-02  -2.381  0.01726 *  
## day_of_the_week2        -3.304e+00  1.008e+00  -3.279  0.00104 ** 
## day_of_the_week3        -1.211e+00  1.011e+00  -1.199  0.23063    
## day_of_the_week4         7.814e-01  1.006e+00   0.776  0.43749    
## day_of_the_week5        -6.906e-01  1.011e+00  -0.683  0.49454    
## day_of_the_week6         1.997e+00  1.019e+00   1.959  0.05016 .  
## day_of_the_week7        -1.647e+00  1.016e+00  -1.622  0.10483    
## price_demand_real_lag1   9.026e-01  2.667e-03 338.493  < 2e-16 ***
## price_demand_real_lag24  3.332e-02  2.589e-03  12.870  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.15 on 26851 degrees of freedom
## Multiple R-squared:  0.9341, Adjusted R-squared:  0.934 
## F-statistic:  9510 on 40 and 26851 DF,  p-value: < 2.2e-16
vif_results <- vif(M2_D_lagged)
print(vif_results)
##                             GVIF Df GVIF^(1/(2*Df))
## time                    1.948501 23        1.014607
## net_plan_effect         1.864507  1        1.365469
## infl                    3.365723  1        1.834591
## tech_avg                5.288970  1        2.299776
## real_usd_rub            3.578163  1        1.891603
## coal_rub_real           4.072753  1        2.018106
## al_rub_real             2.778751  1        1.666959
## steel_rub_real          4.066759  1        2.016621
## weather_t               5.196629  1        2.279612
## weather_u               1.460366  1        1.208456
## day_of_the_week         1.036831  6        1.003019
## price_demand_real_lag1  2.895717  1        1.701681
## price_demand_real_lag24 2.729342  1        1.652072

Republic of Tatarstan Analysis

core_vars <- Republic_of_Tatarstan[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]

# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const")  # Test lags 1 to 35

plot(var_model)

# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection)  # Shows optimal lags by AIC, BIC, etc.
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     31     28     27     31
for (i in 1:35) {Republic_of_Tatarstan [[paste0("price_demand_real_lag", i)]] <- lag(Republic_of_Tatarstan$price_demand_real, i)}

# Remove rows with NA values (created by lagging) 
Republic_of_Tatarstan_clean <- na.omit(Republic_of_Tatarstan)

# Instead of using all planning variables, create:
Republic_of_Tatarstan_clean$net_plan_effect <- with(Republic_of_Tatarstan_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)

# Replace individual tech variables with:
Republic_of_Tatarstan_clean$tech_avg <- with(Republic_of_Tatarstan_clean, tech_min_tps/2 + tech_max_tps/2)
Republic_of_Tatarstan_clean$tech_range <- with(Republic_of_Tatarstan_clean, tech_max_tps - tech_min_tps)

lag_terms_Republic_of_Tatarstan <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Republic_of_Tatarstan <- "price_demand_real ~ time + net_plan_effect +tech_avg + infl + oil_rub_real + coal_rub_real + steel_rub_real + weather_t + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"

# Fit the model
M3_D_lagged <- lm(formula = base_formula_Republic_of_Tatarstan, data = Republic_of_Tatarstan_clean)
summary(M3_D_lagged)
## 
## Call:
## lm(formula = base_formula_Republic_of_Tatarstan, data = Republic_of_Tatarstan_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -498.41  -20.31    0.57   19.94  617.33 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4.575e+01  6.682e+00   6.847 7.71e-12 ***
## time1                   -1.870e+00  2.054e+00  -0.910  0.36264    
## time10                   1.204e+02  2.382e+00  50.541  < 2e-16 ***
## time11                   1.140e+02  2.381e+00  47.875  < 2e-16 ***
## time12                   1.118e+02  2.380e+00  46.973  < 2e-16 ***
## time13                   1.175e+02  2.378e+00  49.425  < 2e-16 ***
## time14                   1.124e+02  2.378e+00  47.264  < 2e-16 ***
## time15                   1.069e+02  2.383e+00  44.854  < 2e-16 ***
## time16                   1.106e+02  2.408e+00  45.934  < 2e-16 ***
## time17                   1.052e+02  2.424e+00  43.372  < 2e-16 ***
## time18                   1.050e+02  2.410e+00  43.585  < 2e-16 ***
## time19                   1.074e+02  2.368e+00  45.363  < 2e-16 ***
## time2                    2.141e+01  2.087e+00  10.258  < 2e-16 ***
## time20                   9.057e+01  2.328e+00  38.899  < 2e-16 ***
## time21                   6.762e+01  2.260e+00  29.920  < 2e-16 ***
## time22                  -2.522e+01  2.188e+00 -11.531  < 2e-16 ***
## time23                  -6.303e+01  2.068e+00 -30.477  < 2e-16 ***
## time3                    4.839e+01  2.110e+00  22.935  < 2e-16 ***
## time4                    8.485e+01  2.114e+00  40.134  < 2e-16 ***
## time5                    1.326e+02  2.088e+00  63.527  < 2e-16 ***
## time6                    2.021e+02  2.073e+00  97.509  < 2e-16 ***
## time7                    2.164e+02  2.112e+00 102.472  < 2e-16 ***
## time8                    1.990e+02  2.236e+00  88.998  < 2e-16 ***
## time9                    1.541e+02  2.342e+00  65.821  < 2e-16 ***
## net_plan_effect         -1.690e-02  2.635e-03  -6.413 1.45e-10 ***
## tech_avg                 2.938e-03  9.348e-04   3.143  0.00168 ** 
## infl                    -2.009e+01  4.466e+00  -4.499 6.85e-06 ***
## oil_rub_real            -2.930e-05  3.750e-04  -0.078  0.93772    
## coal_rub_real           -2.246e-05  5.656e-06  -3.970 7.19e-05 ***
## steel_rub_real           9.977e-05  1.827e-05   5.461 4.78e-08 ***
## weather_t                5.686e-01  6.222e-02   9.139  < 2e-16 ***
## day_of_the_week2        -2.479e+00  1.130e+00  -2.193  0.02831 *  
## day_of_the_week3        -2.782e+00  1.129e+00  -2.463  0.01377 *  
## day_of_the_week4        -6.588e-01  1.127e+00  -0.585  0.55874    
## day_of_the_week5         3.390e+00  1.130e+00   3.001  0.00269 ** 
## day_of_the_week6        -2.658e+00  1.170e+00  -2.271  0.02315 *  
## day_of_the_week7        -1.182e+01  1.184e+00  -9.982  < 2e-16 ***
## price_demand_real_lag1   8.551e-01  3.077e-03 277.917  < 2e-16 ***
## price_demand_real_lag24  4.534e-02  2.399e-03  18.898  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.93 on 26696 degrees of freedom
## Multiple R-squared:  0.9594, Adjusted R-squared:  0.9593 
## F-statistic: 1.66e+04 on 38 and 26696 DF,  p-value: < 2.2e-16
vif_results <- vif(M3_D_lagged)
print(vif_results)
##                             GVIF Df GVIF^(1/(2*Df))
## time                    7.114545 23        1.043578
## net_plan_effect         2.154616  1        1.467861
## tech_avg                1.928068  1        1.388549
## infl                    2.397434  1        1.548365
## oil_rub_real            1.417333  1        1.190518
## coal_rub_real           1.597643  1        1.263979
## steel_rub_real          2.990977  1        1.729444
## weather_t               1.788782  1        1.337453
## day_of_the_week         1.261249  6        1.019530
## price_demand_real_lag1  6.223720  1        2.494739
## price_demand_real_lag24 3.785636  1        1.945671

Moscow Oblast Analysis

core_vars <- Moscow_Oblast[, c("price_demand_real", "infl", "coal_rub_real", "price_supply_real", "steel_rub_real")]

# Fit VAR model with different lags
var_model <- VAR(core_vars, p = 35, type = "const")  # Test lags 1 to 35

plot(var_model)

# Extract AIC/BIC
lag_selection <- VARselect(core_vars, lag.max = 35, type = "const")
print(lag_selection$selection)  # Shows optimal lags by AIC, BIC, etc.
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     28     28     27     28
for (i in 1:35) {Moscow_Oblast [[paste0("price_demand_real_lag", i)]] <- lag(Moscow_Oblast$price_demand_real, i)}

# Remove rows with NA values (created by lagging) 
Moscow_Oblast_clean <- na.omit(Moscow_Oblast)

# Instead of using all planning variables, create:
Moscow_Oblast_clean$net_plan_effect <- with(Moscow_Oblast_clean, ov_plan_value - plan_value_tps + plan_exp - plan_imp)

# Replace individual tech variables with:
Moscow_Oblast_clean$tech_avg <- with(Moscow_Oblast_clean, tech_min_tps/2 + tech_max_tps/2)
Moscow_Oblast_clean$tech_range <- with(Moscow_Oblast_clean, tech_max_tps - tech_min_tps)

lag_terms_Moscow_Oblast <- paste0("price_demand_real_lag", 1:35, collapse = " + ")
base_formula_Moscow_Oblast <- "price_demand_real ~ time + net_plan_effect +tech_avg + infl + coal_rub_real + al_rub_real + steel_rub_real + weather_t + day_of_the_week + price_demand_real_lag1 + price_demand_real_lag24"

# Fit the model
M4_D_lagged <- lm(formula = base_formula_Moscow_Oblast, data = Moscow_Oblast_clean)
summary(M4_D_lagged)
## 
## Call:
## lm(formula = base_formula_Moscow_Oblast, data = Moscow_Oblast_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -769.26  -26.55    0.98   26.64  759.91 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              7.510e+01  8.158e+00   9.206  < 2e-16 ***
## time1                   -1.149e+01  2.370e+00  -4.848 1.25e-06 ***
## time10                   1.648e+02  2.721e+00  60.581  < 2e-16 ***
## time11                   1.400e+02  2.711e+00  51.629  < 2e-16 ***
## time12                   1.236e+02  2.712e+00  45.563  < 2e-16 ***
## time13                   1.411e+02  2.703e+00  52.219  < 2e-16 ***
## time14                   1.296e+02  2.705e+00  47.908  < 2e-16 ***
## time15                   1.200e+02  2.716e+00  44.170  < 2e-16 ***
## time16                   1.289e+02  2.727e+00  47.264  < 2e-16 ***
## time17                   1.288e+02  2.768e+00  46.540  < 2e-16 ***
## time18                   1.331e+02  2.779e+00  47.885  < 2e-16 ***
## time19                   1.247e+02  2.716e+00  45.892  < 2e-16 ***
## time2                    1.558e+01  2.412e+00   6.461 1.06e-10 ***
## time20                   1.097e+02  2.670e+00  41.081  < 2e-16 ***
## time21                   8.464e+01  2.634e+00  32.134  < 2e-16 ***
## time22                  -3.870e+01  2.563e+00 -15.099  < 2e-16 ***
## time23                  -8.879e+01  2.401e+00 -36.972  < 2e-16 ***
## time3                    4.257e+01  2.450e+00  17.375  < 2e-16 ***
## time4                    7.863e+01  2.463e+00  31.924  < 2e-16 ***
## time5                    1.280e+02  2.432e+00  52.623  < 2e-16 ***
## time6                    2.059e+02  2.403e+00  85.693  < 2e-16 ***
## time7                    2.387e+02  2.380e+00 100.286  < 2e-16 ***
## time8                    2.584e+02  2.439e+00 105.963  < 2e-16 ***
## time9                    2.099e+02  2.623e+00  80.007  < 2e-16 ***
## net_plan_effect          6.925e-03  1.173e-03   5.902 3.64e-09 ***
## tech_avg                 2.540e-03  5.241e-04   4.846 1.26e-06 ***
## infl                    -4.651e+01  4.167e+00 -11.163  < 2e-16 ***
## coal_rub_real           -2.543e-05  7.036e-06  -3.615 0.000301 ***
## al_rub_real             -5.456e-03  1.524e-03  -3.582 0.000342 ***
## steel_rub_real           1.191e-04  2.377e-05   5.010 5.48e-07 ***
## weather_t                4.489e-01  6.862e-02   6.543 6.15e-11 ***
## day_of_the_week2        -3.884e+00  1.307e+00  -2.971 0.002968 ** 
## day_of_the_week3        -3.246e+00  1.310e+00  -2.477 0.013258 *  
## day_of_the_week4        -1.617e+00  1.304e+00  -1.241 0.214766    
## day_of_the_week5         3.057e+00  1.311e+00   2.331 0.019741 *  
## day_of_the_week6        -3.764e+00  1.321e+00  -2.849 0.004392 ** 
## day_of_the_week7        -1.454e+01  1.352e+00 -10.756  < 2e-16 ***
## price_demand_real_lag1   8.545e-01  3.047e-03 280.443  < 2e-16 ***
## price_demand_real_lag24  3.709e-02  2.177e-03  17.034  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56.85 on 27086 degrees of freedom
## Multiple R-squared:  0.964,  Adjusted R-squared:  0.964 
## F-statistic: 1.91e+04 on 38 and 27086 DF,  p-value: < 2.2e-16
vif_results <- vif(M4_D_lagged)
print(vif_results)
##                             GVIF Df GVIF^(1/(2*Df))
## time                    8.436278 23        1.047451
## net_plan_effect         1.498458  1        1.224115
## tech_avg                4.194912  1        2.048149
## infl                    2.300805  1        1.516840
## coal_rub_real           1.857909  1        1.363051
## al_rub_real             2.704869  1        1.644649
## steel_rub_real          3.792091  1        1.947329
## weather_t               4.231530  1        2.057068
## day_of_the_week         1.136027  6        1.010685
## price_demand_real_lag1  6.988660  1        2.643607
## price_demand_real_lag24 3.568952  1        1.889167

Residual Analysis

Focused ARMA Modeling (Lags 1, 12, 24)

residuals_M1_D <- residuals(M1_D_lagged)
residuals_M2_D <- residuals(M2_D_lagged)
residuals_M3_D <- residuals(M3_D_lagged)
residuals_M4_D <- residuals(M4_D_lagged)

plot(residuals_M1_D)

plot(residuals_M2_D)

plot(residuals_M3_D)

plot(residuals_M4_D)